"""
图片伪装反爬虫
图片伪装反爬虫绕过实战：http://www.porters.vip/confusion/recruit.html，获取详情页中的企业名称和联系电话
"""
import io
import requests
from urllib.parse import urljoin
from lxml import etree
try:
    from PIL import Image
except ImportError:
    import Image
import pytesseract

url = 'http://www.porters.vip/confusion/recruit.html'
res = requests.get(url)
parser = etree.HTMLParser(encoding='utf-8')
html = etree.HTML(res.text, parser=parser)
name = html.xpath('//h1[@class="interval"]//text()')[0]     # todo 企业名称
img_ele = html.xpath('//*[@class="pn"]/@src')[0]    # todo 图片
img_url = urljoin(url, img_ele)     # todo 图片链接
# todo 请求图片，拿到图片的字节流内容
img_body = requests.get(img_url).content
# todo 使用Image.open打开图片字节流，得到图片对象
image_stream = Image.open(io.BytesIO(img_body))
# todo 使用光学字符识别从图片对象中读取文字并打印输出结果
result = pytesseract.image_to_string(image_stream)
print(result)   # todo 400-88888888
